{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import re\n",
    "import csv\n",
    "import time\n",
    "import random\n",
    "import requests\n",
    "from lxml import etree\n",
    "from time import  sleep\n",
    "from bs4 import BeautifulSoup\n",
    "from threading import Semaphore\n",
    "from concurrent.futures import ThreadPoolExecutor\n",
    "from faker import Factory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getHTMLResponse(url):\n",
    "    fc = Factory.create()\n",
    "    headers = {\n",
    "        'Connection': 'keep-alive',\n",
    "        'Cache-Control': 'max-age=0',\n",
    "        'Upgrade-Insecure-Requests': '1',\n",
    "        'User-Agent':fc.user_agent()\n",
    "    }\n",
    "\n",
    "    try:\n",
    "        r = requests.get(url,headers=headers,timeout=15)\n",
    "        r.raise_for_status()\n",
    "        r.encoding = r.apparent_encoding\n",
    "        # 无框架延迟处理\n",
    "        random_time = random.randint(3,8)\n",
    "        sleep(random_time)\n",
    "        return r\n",
    "    except:\n",
    "        print(\"获取html失败\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getData(content,csv_file):\n",
    "    try:\n",
    "        soup = etree.HTML(content)\n",
    "        text_blocks = soup.xpath(r'//ul[@class=\"sellListContent\"]/li')\n",
    "        for text in text_blocks: \n",
    "            # 标题\n",
    "            title = text.xpath(r'.//div/div[@class=\"title\"]/a/text()')[-1]\n",
    "#           print(title)\n",
    "\n",
    "            # 详细连接\n",
    "            link = text.xpath(r'.//div/div[@class=\"title\"]/a/@href')\n",
    "#             print(link)\n",
    "\n",
    "            # 小区名\n",
    "            house = text.xpath(r'.//div/div[@class=\"flood\"]/div/a[1]/text()')[-1]\n",
    "#             print(house)\n",
    "\n",
    "            # 位置\n",
    "            site = text.xpath(r'.//div/div[@class=\"flood\"]/div/a[2]/text()')[-1]\n",
    "#             print(site)\n",
    "\n",
    "            # 房屋信息\n",
    "            house_info = text.xpath(r'.//div/div[@class=\"address\"]/div/text()')[0]\n",
    "#             print(house_info)\n",
    "\n",
    "            # 关注信息\n",
    "            followinfo =  text.xpath(r'.//div/div[@class=\"followInfo\"]/text()')[0]\n",
    "#             print(followinfo)\n",
    "\n",
    "            # 标签\n",
    "            tag_info = text.xpath(r'.//div/div[@class=\"tag\"]/span//text()')\n",
    "#             print(tag_info)\n",
    "            taginfo = \"\".join(tag_info)\n",
    "#             print(taginfo)\n",
    "\n",
    "            # 总价\n",
    "            totalPrice = text.xpath(r'.//div/div[@class=\"priceInfo\"]/div[1]')[0].xpath('string(.)')\n",
    "#             print(totalPrice)\n",
    "\n",
    "            # 每平方均价\n",
    "            unitPrice = text.xpath(r'.//div/div[@class=\"priceInfo\"]/div[2]/span/text()')[-1]\n",
    "#             print(unitPrice)\n",
    "\n",
    "            csv_file.writerow([title,house,site,totalPrice,unitPrice,house_info,followinfo,link])\n",
    "    except Exception as reason:\n",
    "        print(f\"获取内容异常:{reason.args}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getPageNum(url):\n",
    "    urls = []\n",
    "    fc = Factory.create()\n",
    "    headers = {'User-Agent':fc.user_agent()}\n",
    "    r = requests.get(url,headers=headers)\n",
    "    soup = etree.HTML(r.text)\n",
    "    is_exist_pages = soup.xpath('//div[@class=\"page-box house-lst-page-box\"]/@page-data')[0]\n",
    "    print(is_exist_pages)\n",
    "    if is_exist_pages:\n",
    "        maxPage = eval(is_exist_pages)[\"totalPage\"]\n",
    "#         maxPage = int(20)\n",
    "        print(\"总页数：\", maxPage)\n",
    "        for page in range(1, maxPage + 1):\n",
    "            new_url = re.sub(r'/pg\\d+/','/pg%d/' % page,url,re.S)\n",
    "            urls.append(new_url)       \n",
    "    else:\n",
    "        urls.append(url)\n",
    "    return urls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "def run_savefile(url,FILE_PATH,maxpage=None):\n",
    "    HEADERS = ['title','house','site','totalPrice','unitPrice','house_info','followinfo','link']\n",
    "    # 计时\n",
    "    strat = time.time()\n",
    "    # 1.首次访问网页 获取 页数 生成urls集合\n",
    "    urls = getPageNum(url)\n",
    "    # 2. 读取数据 保存文件\n",
    "    with open(file=FILE_PATH, mode=\"w\", newline=\"\", encoding=\"utf-8\") as file:\n",
    "        # 创建csv写入对象\n",
    "        csv_file = csv.writer(file)\n",
    "        # 写入头部信息\n",
    "        csv_file.writerow(HEADERS)\n",
    "        # 写入数据 先分布式获取页面代码存储，后续在进行读取信息\n",
    "        contents = []\n",
    "        with ThreadPoolExecutor(max_workers=4) as executor:\n",
    "            for url, data in zip(urls, executor.map(getHTMLResponse, urls)):\n",
    "                contents.append(data.content)\n",
    "                print(f\"{url} is finished\")\n",
    "\n",
    "        for content in contents:\n",
    "            getData(content,csv_file)\n",
    "    print(\"finished end!\")\n",
    "    print(\"用时：\", time.time() - strat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [],
   "source": [
    "def main():\n",
    "    filename = \"二手房\"\n",
    "    # url\n",
    "    url = 'https://hz.lianjia.com/ershoufang' + '/pg1/'\n",
    "    # 保存路径 和 文件名\n",
    "    FILE_PATH = '.' + os.sep + filename + '.csv'\n",
    "    # csv文件头部信息\n",
    "    run_savefile(url,FILE_PATH)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\"totalPage\":100,\"curPage\":1}\n",
      "总页数： 100\n",
      "https://hz.lianjia.com/ershoufang/pg1/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg2/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg3/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg4/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg5/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg6/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg7/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg8/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg9/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg10/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg11/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg12/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg13/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg14/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg15/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg16/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg17/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg18/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg19/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg20/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg21/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg22/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg23/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg24/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg25/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg26/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg27/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg28/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg29/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg30/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg31/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg32/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg33/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg34/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg35/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg36/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg37/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg38/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg39/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg40/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg41/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg42/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg43/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg44/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg45/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg46/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg47/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg48/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg49/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg50/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg51/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg52/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg53/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg54/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg55/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg56/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg57/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg58/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg59/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg60/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg61/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg62/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg63/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg64/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg65/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg66/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg67/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg68/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg69/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg70/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg71/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg72/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg73/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg74/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg75/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg76/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg77/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg78/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg79/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg80/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg81/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg82/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg83/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg84/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg85/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg86/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg87/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg88/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg89/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg90/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg91/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg92/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg93/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg94/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg95/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg96/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg97/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg98/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg99/ is finished\n",
      "https://hz.lianjia.com/ershoufang/pg100/ is finished\n",
      "finished end!\n",
      "用时： 230.99165439605713\n"
     ]
    }
   ],
   "source": [
    "main()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>house</th>\n",
       "      <th>site</th>\n",
       "      <th>totalPrice</th>\n",
       "      <th>unitPrice</th>\n",
       "      <th>house_info</th>\n",
       "      <th>followinfo</th>\n",
       "      <th>link</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>此房满五年，自住精装，品牌家具家电，可拎包入住</td>\n",
       "      <td>华盛星洲翠谷</td>\n",
       "      <td>小和山</td>\n",
       "      <td>190万</td>\n",
       "      <td>单价21840元/平米</td>\n",
       "      <td>2室2厅 | 87平米 | 南 | 精装 | 低楼层(共11层) | 2009年建 | 板楼</td>\n",
       "      <td>58人关注 / 4个月以前发布</td>\n",
       "      <td>['https://hz.lianjia.com/ershoufang/1031080307...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>西湖边 ，精装修南北通透的西边套，1号线定安路地铁</td>\n",
       "      <td>涌金门社区</td>\n",
       "      <td>湖滨</td>\n",
       "      <td>180万</td>\n",
       "      <td>单价46178元/平米</td>\n",
       "      <td>1室1厅 | 38.98平米 | 南 西 北 | 精装 | 高楼层(共6层) | 1985年...</td>\n",
       "      <td>51人关注 / 18天以前发布</td>\n",
       "      <td>['https://hz.lianjia.com/ershoufang/1031104083...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>自主精装，满五唯一，东边套，中高楼层，视野开阔！</td>\n",
       "      <td>水乡华庭</td>\n",
       "      <td>塘栖</td>\n",
       "      <td>159万</td>\n",
       "      <td>单价17646元/平米</td>\n",
       "      <td>2室2厅 | 90.11平米 | 南 北 | 精装 | 高楼层(共12层)  | 板楼</td>\n",
       "      <td>11人关注 / 1个月以前发布</td>\n",
       "      <td>['https://hz.lianjia.com/ershoufang/1031098498...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>房产权50年，地铁口300米 2.民用电商用水， 3.精装修</td>\n",
       "      <td>盛奥西溪铭座</td>\n",
       "      <td>未来科技城</td>\n",
       "      <td>69.9万</td>\n",
       "      <td>单价15107元/平米</td>\n",
       "      <td>1室1厅 | 46.27平米 | 北 | 精装 | 低楼层(共12层)  | 板楼</td>\n",
       "      <td>72人关注 / 21天以前发布</td>\n",
       "      <td>['https://hz.lianjia.com/ershoufang/1031103575...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>英特学府 高楼层 三开间朝南 位置安静 房东诚心出售</td>\n",
       "      <td>金成英特学府</td>\n",
       "      <td>闲林</td>\n",
       "      <td>287万</td>\n",
       "      <td>单价32375元/平米</td>\n",
       "      <td>3室1厅 | 88.65平米 | 南 | 毛坯 | 高楼层(共18层)  | 板楼</td>\n",
       "      <td>24人关注 / 3个月以前发布</td>\n",
       "      <td>['https://hz.lianjia.com/ershoufang/1031086018...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                            title    house   site totalPrice    unitPrice  \\\n",
       "0         此房满五年，自住精装，品牌家具家电，可拎包入住  华盛星洲翠谷     小和山       190万  单价21840元/平米   \n",
       "1       西湖边 ，精装修南北通透的西边套，1号线定安路地铁   涌金门社区      湖滨       180万  单价46178元/平米   \n",
       "2        自主精装，满五唯一，东边套，中高楼层，视野开阔！    水乡华庭      塘栖       159万  单价17646元/平米   \n",
       "3  房产权50年，地铁口300米 2.民用电商用水， 3.精装修  盛奥西溪铭座   未来科技城      69.9万  单价15107元/平米   \n",
       "4      英特学府 高楼层 三开间朝南 位置安静 房东诚心出售  金成英特学府      闲林       287万  单价32375元/平米   \n",
       "\n",
       "                                          house_info       followinfo  \\\n",
       "0     2室2厅 | 87平米 | 南 | 精装 | 低楼层(共11层) | 2009年建 | 板楼  58人关注 / 4个月以前发布   \n",
       "1  1室1厅 | 38.98平米 | 南 西 北 | 精装 | 高楼层(共6层) | 1985年...  51人关注 / 18天以前发布   \n",
       "2        2室2厅 | 90.11平米 | 南 北 | 精装 | 高楼层(共12层)  | 板楼  11人关注 / 1个月以前发布   \n",
       "3          1室1厅 | 46.27平米 | 北 | 精装 | 低楼层(共12层)  | 板楼  72人关注 / 21天以前发布   \n",
       "4          3室1厅 | 88.65平米 | 南 | 毛坯 | 高楼层(共18层)  | 板楼  24人关注 / 3个月以前发布   \n",
       "\n",
       "                                                link  \n",
       "0  ['https://hz.lianjia.com/ershoufang/1031080307...  \n",
       "1  ['https://hz.lianjia.com/ershoufang/1031104083...  \n",
       "2  ['https://hz.lianjia.com/ershoufang/1031098498...  \n",
       "3  ['https://hz.lianjia.com/ershoufang/1031103575...  \n",
       "4  ['https://hz.lianjia.com/ershoufang/1031086018...  "
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_csv('二手房.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>house</th>\n",
       "      <th>site</th>\n",
       "      <th>totalPrice</th>\n",
       "      <th>unitPrice</th>\n",
       "      <th>house_info</th>\n",
       "      <th>followinfo</th>\n",
       "      <th>link</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>2250</td>\n",
       "      <td>2250</td>\n",
       "      <td>2250</td>\n",
       "      <td>2250</td>\n",
       "      <td>2250</td>\n",
       "      <td>2250</td>\n",
       "      <td>2250</td>\n",
       "      <td>2250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>2238</td>\n",
       "      <td>1146</td>\n",
       "      <td>102</td>\n",
       "      <td>488</td>\n",
       "      <td>2178</td>\n",
       "      <td>2243</td>\n",
       "      <td>1036</td>\n",
       "      <td>2247</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>清爽装修、五脏俱全一居室，满五唯一！</td>\n",
       "      <td>星汇花园</td>\n",
       "      <td>临平</td>\n",
       "      <td>225万</td>\n",
       "      <td>单价25396元/平米</td>\n",
       "      <td>2室1厅 | 48.03平米 | 南 | 精装 | 中楼层(共8层) | 1998年建 | 板楼</td>\n",
       "      <td>11人关注 / 1个月以前发布</td>\n",
       "      <td>['https://hz.lianjia.com/ershoufang/1031104588...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>2</td>\n",
       "      <td>20</td>\n",
       "      <td>150</td>\n",
       "      <td>33</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>19</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     title  house  site totalPrice    unitPrice  \\\n",
       "count                 2250   2250  2250       2250         2250   \n",
       "unique                2238   1146   102        488         2178   \n",
       "top     清爽装修、五脏俱全一居室，满五唯一！  星汇花园     临平       225万  单价25396元/平米   \n",
       "freq                     2     20   150         33            3   \n",
       "\n",
       "                                              house_info       followinfo  \\\n",
       "count                                               2250             2250   \n",
       "unique                                              2243             1036   \n",
       "top     2室1厅 | 48.03平米 | 南 | 精装 | 中楼层(共8层) | 1998年建 | 板楼  11人关注 / 1个月以前发布   \n",
       "freq                                                   2               19   \n",
       "\n",
       "                                                     link  \n",
       "count                                                2250  \n",
       "unique                                               2247  \n",
       "top     ['https://hz.lianjia.com/ershoufang/1031104588...  \n",
       "freq                                                    2  "
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  },
  "pycharm": {
   "stem_cell": {
    "cell_type": "raw",
    "source": [],
    "metadata": {
     "collapsed": false
    }
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}